################################################################################
### Replication Code: Figure 3
################################################################################
#
# Paper: Rationalizing Protest Participation  
#
# Authors: Tim Baule, Jonathan Bothner, Maximilian Kähny
#
# Software: R version 4.4.0 using Windows
#
################################################################################

# laod packages
library(tidyverse)  # tidyverse
library(readxl)     # excel commands
library(haven)      # dta compatibility
library(vtable)     # summary stats
library(openxlsx)   # write excel
library(extrafont)  # fontface for graphs
library(scales)     # scaling of y axis

# set-up
rm(list=ls())
setwd("C:/Users/timba/OneDrive - Universität Bayreuth (1)/Uni/Research General/Inequality and Protests/Empirics/raw/CCC_protest data")

# load data (https://dataverse.harvard.edu/dataverse/crowdcountingconsortium)
alldata <- read_csv("ccc_compiled.csv")

# select years
alldata$date <- as.Date(alldata$date, format  = "%d/%m/%Y")
alldata <- subset(alldata, year(alldata$date) %in% seq(2020,2020))

# define categories
issues = c("racism", "immigration","healthcare", "environment")
for(i in issues){
  alldata[i] = grepl(i, alldata$issues)
}

# filter
alldata = filter(alldata, racism | immigration | healthcare | environment)

# group
alldata$event_count = 1
return_list = data.frame(NA,NA,NA)
names(return_list) = c("category", "participants_mean", "event_count")
i = 0
for (issue in issues) {
  temp <- alldata %>%
    filter(.data[[issue]] == TRUE) %>%
    group_by(.data[[issue]]) %>%
    summarize(
      size_mean = sum(size_mean, na.rm = TRUE),
      event_count = sum(event_count),
      .groups = "drop"
    )
  i = i+1
  temp[,1] = issue
  return_list[i,] = temp
}


# create Figure 3
scale <- 0.005  
table$category <- factor(table$category, levels = c("racism", "immigration","healthcare", "environment"))

p <- ggplot(subset(table, !is.na(category)), aes(x=category)) +
  geom_bar(aes(y=participants_mean, fill="Total Participants"), stat="identity", position=position_dodge()) +
  geom_point(aes(y=event_count/scale, color="Event Count"), size=3) +
  scale_y_continuous(name="Participation Number", labels=label_comma(), sec.axis=sec_axis(~.*scale, name="Event Count", labels=label_comma())) +
  xlab("Category") +
  scale_fill_manual(name="", values=c("Total Participants"="grey70")) +
  scale_x_discrete(labels=c("Racism", "Immigration", "Environment", "Healthcare")) +
  scale_color_manual(name="", values=c("Event Count"="black")) +
  theme_minimal() +
  theme(
    text=element_text(family="Times New Roman", size=14),
    axis.title=element_text(size=18),
    legend.title=element_text(size=18),
    legend.text=element_text(size=14),
    legend.position="bottom"
  )

# ggsave("Protest Event Frequency_issues.pdf", p, device = cairo_pdf, width = 13.4, height = 4)









